#########################################################################################################
# Cleaning File for When Economic Elites Support Democracy
#########################################################################################################

########################################################################
# --- Create block-level weights (inverse probability weights based on probability of treatment in each province) ---
########################################################################

dist <- dat %>% #aggregate to the electoral district
          group_by(province, circ) %>%
          summarize(treat_strict = mean(treat_strict),
                    treat = mean(treat))

wt <- dist %>%
  group_by(province) %>% #probability of treatment in each province
  summarize(treat_prob = mean(treat),
            treatstrict_prob = mean(treat_strict))

dat <- full_join(dat, wt, by = "province") ; rm(wt)

dat <- dat %>% #calculate inverse probability weights
         mutate(
           ipw = case_when(
             treat == 1 ~ 1 / treat_prob,
             treat == 0 ~ 1 / (1 - treat_prob)
          )
         )

dat <- dat %>% #calculate inverse probability weights using alternative coding of treatment
          mutate(
            ipw_strict = case_when(
              treat_strict == 1 ~ 1 / treatstrict_prob,
              treat_strict == 0 ~ 1 / (1 - treatstrict_prob)
            )
            )
  
########################################################################
# --- Create variables from 1895 census -----------------------------------
########################################################################

dat$realest_arg_prop_c95 <- dat$realestate_owner_arg_c95 / dat$population_c95
dat$realest_prop_c95 <- dat$realestate_owner_c95 / dat$population_c95
dat$national_guard_prop_c95 <- dat$national_guard_c95 / dat$population_c95
dat$arg_men_prop_c95 <- dat$arg_men_c95 / (dat$population_c95)
dat$urban_prop_c95 <- dat$urban_c95 / dat$population_c95
dat$lit_prop_c95 <- dat$literate_all_c95 / dat$sixabove_c95
dat$ag_acre <- dat$grape_acre_c95 + dat$peanut_acre_c95 + dat$sugar_acre_c95 +
                  dat$tobaco_acre_c95 + dat$cotton_acre_c95 + dat$wheat_acre_c95 +
                  dat$corn_acre_c95 + dat$flax_acre_c95 

dat$sharecrop_rentfarm_c95 <- dat$medianero_farms_c95 / 
                                      (dat$medianero_farms_c95 + dat$renter_farms_c95)
dat$sharecrop_allfarm_c95 <- dat$medianero_farms_c95 / 
                                      (dat$medianero_farms_c95 + dat$renter_farms_c95 + dat$owner_farms_c95)

########################################################################
# --- Median-splits for labor control potential (two measures) ---
########################################################################

# create High/Low potential for labor control variable
med <- dat %>% 
          select(province, loc, sharecrop_rentfarm_c95) %>% 
          mutate(median_shar = median(sharecrop_rentfarm_c95, na.rm = TRUE),
                 sharecrop_rent_bin = ifelse(sharecrop_rentfarm_c95 > median_shar, "High", "Low")) %>% select(-median_shar)

dat <- full_join(dat, med, by = c("province", "loc", "sharecrop_rentfarm_c95")); rm(med)

# create High/Low potential for alternate labor control variable
med <- dat %>% 
  select(province, loc, sharecrop_allfarm_c95) %>% 
  mutate(median_shar = median(sharecrop_allfarm_c95, na.rm = TRUE),
         sharecrop_all_bin = ifelse(sharecrop_allfarm_c95 > median_shar, "High", "Low"))  %>% select(-median_shar)

dat <- full_join(dat, med, by = c("province", "loc", "sharecrop_allfarm_c95")); rm(med)

########################################################################
# --- Outcome variables: binary & count versions for committees/leagues ---
########################################################################

dat$liga_bin <- ifelse(dat$liga_chapter_nbr == 0, 0, 1)#create binary committee indicator
dat$rsp_bin <- ifelse(dat$rsp_committee_nbr == 0, 0, 1)#create binary committee indicator
dat$rsp_nbr <- ifelse(dat$rsp_bin == 0, 0, dat$rsp_committee_nbr); dat$rsp_committee_nbr <- NULL #add zeros to committee indicator

########################################################################
# --- Convert logicals/factors to numeric where appropriate (to match original) ---
########################################################################
dat <- dat %>%
  mutate(across(where(is.logical), as.numeric)) %>%
  mutate(across(where(is.factor), as.numeric))

########################
# --- Create cluster (electoral district) and region variables ---
########################

dat <- dat %>%
  mutate(
    clust_uni = paste(province, circ), #cluster variable based on level of treatment assignment (electoral district)
    region = if_else(province == "federal district", "bsas", "provinces"), #variable to distinguish htes by capital/provinces:
    region = factor(region)
  )


